use "merge_contact_networks_full.dta", clear

replace region = "United States (INAP)" if state == 0 & outcome_type == "State winner"
replace region = "Alabama (United States)" if state == 1 & outcome_type == "State winner"
replace region = "Alaska (United States)" if state == 2 & outcome_type == "State winner"
replace region = "Arizona (United States)" if state == 4 & outcome_type == "State winner"
replace region = "Arkansas (United States)" if state == 5 & outcome_type == "State winner"
replace region = "California (United States)" if state == 6 & outcome_type == "State winner"
replace region = "Colorado (United States)" if state == 8 & outcome_type == "State winner"
replace region = "Connecticut (United States)" if state == 9 & outcome_type == "State winner"
replace region = "Delaware (United States)" if state == 10 & outcome_type == "State winner"
replace region = "Florida (United States)" if state == 12 & outcome_type == "State winner"
replace region = "Georgia (United States)" if state == 13 & outcome_type == "State winner"
replace region = "Hawaii (United States)" if state == 15 & outcome_type == "State winner"
replace region = "Idaho (United States)" if state == 16 & outcome_type == "State winner"
replace region = "Illinois (United States)" if state == 17 & outcome_type == "State winner"
replace region = "Indiana (United States)" if state == 18 & outcome_type == "State winner"
replace region = "Iowa (United States)" if state == 19 & outcome_type == "State winner"
replace region = "Kansas (United States)" if state == 20 & outcome_type == "State winner"
replace region = "Kentucky (United States)" if state == 21 & outcome_type == "State winner"
replace region = "Louisiana (United States)" if state == 22 & outcome_type == "State winner"
replace region = "Maine (United States)" if state == 23 & outcome_type == "State winner"
replace region = "Maryland (United States)" if state == 24 & outcome_type == "State winner"
replace region = "Massachusetts (United States)" if state == 25 & outcome_type == "State winner"
replace region = "Michigan (United States)" if state == 26 & outcome_type == "State winner"
replace region = "Minnesota (United States)" if state == 27 & outcome_type == "State winner"
replace region = "Mississippi (United States)" if state == 28 & outcome_type == "State winner"
replace region = "Missouri (United States)" if state == 29 & outcome_type == "State winner"
replace region = "Montana (United States)" if state == 30 & outcome_type == "State winner"
replace region = "Nebraska (United States)" if state == 31 & outcome_type == "State winner"
replace region = "Nevada (United States)" if state == 32 & outcome_type == "State winner"
replace region = "New Hampshire (United States)" if state == 33 & outcome_type == "State winner"
replace region = "New Jersey (United States)" if state == 34 & outcome_type == "State winner"
replace region = "New Mexico (United States)" if state == 35 & outcome_type == "State winner"
replace region = "New York (United States)" if state == 36 & outcome_type == "State winner"
replace region = "North Carolina (United States)" if state == 37 & outcome_type == "State winner"
replace region = "North Dakota (United States)" if state == 38 & outcome_type == "State winner"
replace region = "Ohio (United States)" if state == 39 & outcome_type == "State winner"
replace region = "Oklahoma (United States)" if state == 40 & outcome_type == "State winner"
replace region = "Oregon (United States)" if state == 41 & outcome_type == "State winner"
replace region = "Pennsylvania (United States)" if state == 42 & outcome_type == "State winner"
replace region = "Rhode Island (United States)" if state == 44 & outcome_type == "State winner"
replace region = "South Carolina (United States)" if state == 45 & outcome_type == "State winner"
replace region = "South Dakota (United States)" if state == 46 & outcome_type == "State winner"
replace region = "Tennessee (United States)" if state == 47 & outcome_type == "State winner"
replace region = "Texas (United States)" if state == 48 & outcome_type == "State winner"
replace region = "Utah (United States)" if state == 49 & outcome_type == "State winner"
replace region = "Vermont (United States)" if state == 50 & outcome_type == "State winner"
replace region = "Virginia (United States)" if state == 51 & outcome_type == "State winner"
replace region = "Washington (United States)" if state == 53 & outcome_type == "State winner"
replace region = "West Virginia (United States)" if state == 54 & outcome_type == "State winner"
replace region = "Wisconsin (United States)" if state == 55 & outcome_type == "State winner"
replace region = "Wyoming (United States)" if state == 56 & outcome_type == "State winner"
replace region = "American Samoa (United States)" if state == 60 & outcome_type == "State winner"
replace region = "Guam (United States)" if state == 66 & outcome_type == "State winner"
replace region = "Northern Mariana Islands (United States)" if state == 69 & outcome_type == "State winner"
replace region = "Puerto Rico (United States)" if state == 72 & outcome_type == "State winner"
replace region = "Virgin Islands (United States)" if state == 78 & outcome_type == "State winner"

drop age ces_year wave4_5 wave11_12 wave17_18 CITY mayorpref reppreference primattention dempreference at_n ca_n ca_d ca_r ca_m de_n de_r gb_n gb_d il_n nz_n nz_d it_n pt_n es_n us_n us_s us_p country_level sample survey_year answer_choices survey_type wt_strategy observations total house code district_unique state_unique id type system electionid system2 type2 pollmarcat study_id_n election_unique care_whole

rename discussion_3pts discussion_hrm
rename disagreement_3pts disagreement_hrm
rename expertise_4pts expertise_hrm
rename size_3pts size_hrm
rename wishful_3pts wishful_hrm

gen care_hrm = care

recode care_hrm 1/2=0 3/4=1 if study_id == "us2000-ANES-USH"
recode care_hrm 1/2=0 3/5=1 if study_id == "us2016-ANES-N"
recode care_hrm 1/2=0 3/5=1 if study_id == "us2020-ANES-N"
recode care_hrm 1/2=0 3/4=1 if study_id == "il2009-INES-N"
recode care_hrm 1/2=0 3/5=1 if study_id == "de2009-GLES-N"
recode care_hrm 1/2=0 3/5=1 if study_id == "de2013-GLES-N"
recode care_hrm 1/2=0 3/5=1 if study_id == "de2017-GLES-N"
recode care_hrm 1/2=0 3/5=1 if study_id == "de2021-GLES-N"

rename state fips

label values fips

rename age2 age

rename study study_name

destring(election_year), replace

replace margin_w = margin_d if election_level == "District"

replace enep_w = enep_d if election_level == "District"

rename margin_w margin

rename enep_w enep

drop margin_d enep_d

label variable origid "ID from original data file"

label variable fips "FIPS state code"

label variable time "Number of days before the election"

label variable male "Respondent's gender"

label variable age "Respondent's age"

label variable education "Education"

label variable interest "Interest for campaign/politics"

label variable news "News attentiveness"

label variable knowledge "Factual political knowledge"

label variable discussion "Political discussion"

label variable discussion_hrm "Political discussion (harmonized)"

label variable wishful "Partisan preference"

label variable forecast "Election forecast"

label variable study_id "Study ID"

label variable polldate "Vote intention survey date"

label variable pollmar "Vote intention margin (1st - 2nd)"

label variable misleading "Misleading survey"

label variable size "Network size"

label variable size_hrm "Network size (harmonized)"

label variable disagreement "Political disagreement"

label variable disagreement_hrm "Political disagreement (harmonized)"

label variable expertise "Network expertise"

label variable district_code "District code"

label variable reelected "Party reelected in district?"

label variable care "Caring for election outcome"

label variable care_hrm "Caring for election outcome (harmonized)"

label variable expertise_hrm "Network expertise (harmonized)"

label variable closeness "Perceived election closeness"

label variable election "Election"

label variable study_name "Study name"

label variable survey_name "Survey name"

label variable election_year "Election year"

label variable outcome_type "Forecasted outcome"

label variable forecast_type "Type of forecast"

label variable election_type "Type of election"

label variable election_level "Election level"

label variable election_system "Election system"

label variable region "Region"

label variable district_name "District name"

label variable margin "Margin of victory"

label variable enep "Effective number of electoral parties"

label variable postsecondary "Postsecondary education"

label variable wishful_hrm "Partisan preference (harmonized)"

* Standardized knowledge

duplicates tag survey_name origid knowledge if origid!=. , generate(duplicates)

egen study_group = group(survey_name)
sort study_group
by study_group: egen knowledge_mean = mean(knowledge) if duplicates < 2
by study_group: egen knowledge_sd  = sd(knowledge) if duplicates < 2
by study_group: gen knowledge_std = (knowledge - knowledge_mean)/knowledge_sd

gen knowledge_hrm = 0 if knowledge_std < 0 & knowledge_std!=.
replace knowledge_hrm = 1 if knowledge_std > 0 & knowledge_std!=.

drop if forecast == .

drop knowledge_mean knowledge_sd knowledge_std

order election election_year election_type election_level election_system study_id study_name survey_name origid region fips district_code district_name time male age education postsecondary interest news knowledge knowledge_hrm wishful wishful_hrm care care_hrm closeness discussion discussion_hrm disagreement disagreement_hrm size size_hrm expertise expertise_hrm margin enep reelected polldate pollmar misleading forecast outcome_type forecast_type

rename study_id forecast_id

save "knewit.dta", replace